; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize,dce,instcombine,simplifycfg -S -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,mve4beat -simplifycfg-require-and-preserve-domtree=1 -tail-predication=disabled < %s | FileCheck %s

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-none-none-eabi"

define hidden void @pointer_phi_v4i32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %s, i32%y) {
; CHECK-LABEL: @pointer_phi_v4i32_add1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load i32, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 1
  %add = add nsw i32 %0, %y
  store i32 %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4i32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v4i32_add2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 3
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 2
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]]
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load i32, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 2
  %add = add nsw i32 %0, %y
  store i32 %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v4i32_add3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 3
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]]
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load i32, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 3
  %add = add nsw i32 %0, %y
  store i32 %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v8i16_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v8i16_add1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP1]]
; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP2]]
; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT:    br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i16
  br label %for.body
for.body:                                         ; preds = %for.body, %for.body.lr.ph
  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %l1 = load i16, ptr %A.addr.011, align 2
  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 1
  %conv1 = add i16 %l1, %0
  store i16 %conv1, ptr %B.addr.09, align 2
  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
  %inc = add nuw nsw i32 %i.010, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v8i16_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v8i16_add2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]]
; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 2
; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
; CHECK-NEXT:    store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i16
  br label %for.body
for.body:                                         ; preds = %for.body, %for.body.lr.ph
  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %l1 = load i16, ptr %A.addr.011, align 2
  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 2
  %conv1 = add i16 %l1, %0
  store i16 %conv1, ptr %B.addr.09, align 2
  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
  %inc = add nuw nsw i32 %i.010, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v8i16_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v8i16_add3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 3
; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
; CHECK-NEXT:    store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i16
  br label %for.body
for.body:                                         ; preds = %for.body, %for.body.lr.ph
  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %l1 = load i16, ptr %A.addr.011, align 2
  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 3
  %conv1 = add i16 %l1, %0
  store i16 %conv1, ptr %B.addr.09, align 2
  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
  %inc = add nuw nsw i32 %i.010, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v16i8_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v16i8_add1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 992
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[NEXT_GEP4]], align 1
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 1
; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP3]], [[TMP0]]
; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i8
  br label %for.body

for.body:
  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %1 = load i8, ptr %A.addr.010, align 1
  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 1
  %conv1 = add i8 %1, %0
  store i8 %conv1, ptr %B.addr.08, align 1
  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
  %inc = add nuw nsw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v16i8_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v16i8_add2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 1984
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]]
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[NEXT_GEP4]], align 1
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 2
; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP4]], [[TMP0]]
; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i8
  br label %for.body

for.body:
  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %1 = load i8, ptr %A.addr.010, align 1
  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 2
  %conv1 = add i8 %1, %0
  store i8 %conv1, ptr %B.addr.08, align 1
  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
  %inc = add nuw nsw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v16i8_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v16i8_add3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 3
; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP1]], [[TMP0]]
; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  %0 = trunc i32 %y to i8
  br label %for.body

for.body:
  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %1 = load i8, ptr %A.addr.010, align 1
  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 3
  %conv1 = add i8 %1, %0
  store i8 %conv1, ptr %B.addr.08, align 1
  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
  %inc = add nuw nsw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4f32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
; CHECK-LABEL: @pointer_phi_v4f32_add1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load float, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 1
  %add = fadd fast float %0, %y
  store float %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4f32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
; CHECK-LABEL: @pointer_phi_v4f32_add2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 3
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 2
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]]
; CHECK-NEXT:    store float [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load float, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 2
  %add = fadd fast float %0, %y
  store float %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
; CHECK-LABEL: @pointer_phi_v4f32_add3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 3
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]]
; CHECK-NEXT:    store float [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load float, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 3
  %add = fadd fast float %0, %y
  store float %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4half_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
; CHECK-LABEL: @pointer_phi_v4half_add1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load half, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 1
  %add = fadd fast half %0, %y
  store half %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4half_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
; CHECK-LABEL: @pointer_phi_v4half_add2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x half>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 2
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]]
; CHECK-NEXT:    store half [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load half, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 2
  %add = fadd fast half %0, %y
  store half %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

define hidden void @pointer_phi_v4half_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
; CHECK-LABEL: @pointer_phi_v4half_add3(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 5952
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[INDEX]], 6
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x half>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x half> [[WIDE_VEC]], <24 x half> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 3
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]]
; CHECK-NEXT:    store half [[ADD]], ptr [[B_ADDR_07]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body
for.body:
  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
  %0 = load half, ptr %A.addr.09, align 4
  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 3
  %add = fadd fast half %0, %y
  store half %add, ptr %B.addr.07, align 4
  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body
end:
  ret void
}

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.interleave.count", i32 2}

define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) {
; CHECK-LABEL: @pointer_phi_v4i32_uf2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239808
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39968
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]]
; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]]
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 192
; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992
; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    ret void
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9992, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_ADDR_08]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[Y]]
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_06]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;

entry:
  br label %for.body

for.cond.cleanup:
  ret void

for.body:
  %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %0 = load i32, ptr %A.addr.08, align 4
  %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6
  %add = add nsw i32 %0, %y
  store i32 %add, ptr %B.addr.06, align 4
  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1
  %inc = add nuw nsw i32 %i.07, 1
  %exitcond = icmp eq i32 %inc, 10000
  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}

!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.interleave.count", i32 4}

define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) {
; CHECK-LABEL: @pointer_phi_v4i32_uf4(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239616
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39936
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 192, i32 216, i32 240, i32 264>
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 288, i32 312, i32 336, i32 360>
; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP4]]
; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]]
; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]]
; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]]
; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[NEXT_GEP]], align 4
; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP9]], align 4
; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8
; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP10]], align 4
; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12
; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 384
; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984
; CHECK-NEXT:    br i1 [[TMP12]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK:       for.cond.cleanup:
; CHECK-NEXT:    ret void
; CHECK:       for.body:
; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9984, [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[A_ADDR_08]], align 4
; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[Y]]
; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_06]], align 4
; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1
; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
;
entry:
  br label %for.body

for.cond.cleanup:
  ret void

for.body:
  %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
  %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
  %0 = load i32, ptr %A.addr.08, align 4
  %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6
  %add = add nsw i32 %0, %y
  store i32 %add, ptr %B.addr.06, align 4
  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1
  %inc = add nuw nsw i32 %i.07, 1
  %exitcond = icmp eq i32 %inc, 10000
  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2
}

define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias nocapture %z) {
; CHECK-LABEL: @mult_ptr_iv(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Z:%.*]], i32 3000
; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i32 3000
; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[Z]]
; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[X]]
; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
; CHECK:       vector.ph:
; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[X]], i32 3000
; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[Z]], i32 3000
; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
; CHECK:       vector.body:
; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[POINTER_PHI5:%.*]] = phi ptr [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1
; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2
; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1
; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 2
; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
; CHECK-NEXT:    [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i32 12
; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT:    br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK:       for.body:
; CHECK-NEXT:    [[X_ADDR_050:%.*]] = phi ptr [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
; CHECK-NEXT:    [[Z_ADDR_049:%.*]] = phi ptr [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
; CHECK-NEXT:    [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 1
; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[X_ADDR_050]], align 1
; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 2
; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT:    [[INCDEC_PTR2]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 3
; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[INCDEC_PTR1]], align 1
; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP10]], 10
; CHECK-NEXT:    [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]]
; CHECK-NEXT:    [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]]
; CHECK-NEXT:    [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 1
; CHECK-NEXT:    store i8 [[MUL]], ptr [[Z_ADDR_049]], align 1
; CHECK-NEXT:    [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 2
; CHECK-NEXT:    store i8 [[MUL1]], ptr [[INCDEC_PTR32]], align 1
; CHECK-NEXT:    [[INCDEC_PTR34]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 3
; CHECK-NEXT:    store i8 [[MUL2]], ptr [[INCDEC_PTR33]], align 1
; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_048]], 1
; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; CHECK:       end:
; CHECK-NEXT:    ret void
;
entry:
  br label %for.body

for.body:
  %x.addr.050 = phi ptr [ %incdec.ptr2, %for.body ], [ %x, %entry ]
  %z.addr.049 = phi ptr [ %incdec.ptr34, %for.body ], [ %z, %entry ]
  %i.048 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  %incdec.ptr = getelementptr inbounds i8, ptr %x.addr.050, i32 1
  %0 = load i8, ptr %x.addr.050, align 1
  %incdec.ptr1 = getelementptr inbounds i8, ptr %x.addr.050, i32 2
  %1 = load i8, ptr %incdec.ptr, align 1
  %incdec.ptr2 = getelementptr inbounds i8, ptr %x.addr.050, i32 3
  %2 = load i8, ptr %incdec.ptr1, align 1
  %conv = zext i8 %0 to i32
  %mul = mul nuw nsw i32 %conv, 10
  %conv1 = zext i8 %1 to i32
  %conv2 = zext i8 %2 to i32
  %mul1 = mul nuw nsw i32 %conv, %conv1
  %mul2 = mul nuw nsw i32 %conv, %conv2
  %conv3 = trunc i32 %mul to i8
  %conv4 = trunc i32 %mul1 to i8
  %conv5 = trunc i32 %mul2 to i8
  %incdec.ptr32 = getelementptr inbounds i8, ptr %z.addr.049, i32 1
  store i8 %conv3, ptr %z.addr.049, align 1
  %incdec.ptr33 = getelementptr inbounds i8, ptr %z.addr.049, i32 2
  store i8 %conv4, ptr %incdec.ptr32, align 1
  %incdec.ptr34 = getelementptr inbounds i8, ptr %z.addr.049, i32 3
  store i8 %conv5, ptr %incdec.ptr33, align 1
  %inc = add nuw i32 %i.048, 1
  %exitcond = icmp eq i32 %inc, 1000
  br i1 %exitcond, label %end, label %for.body

end:
  ret void
}
